library(readr)
library(ggplot2)
df <- read.csv("MI.data", header = FALSE)
colnames(df) <- c("ID", "AGE", "SEX", "INF_ANAM", "STENOK_AN", "FK_STENOK", "IBS_POST", "IBS_NASL", "GB", "SIM_GIPERT", "DLIT_AG", "ZSN_A", "nr_11", "nr_01", "nr_02", "nr_03", "nr_04", "nr_07", "nr_08", "np_01", "np_04", "np_05", "np_07", "np_08", "np_09", "np_10", "endocr_01", "endocr_02", "endocr_03", "zab_leg_01", "zab_leg_02", "zab_leg_03", "zab_leg_04", "zab_leg_06", "S_AD_KBRIG", "D_AD_KBRIG", "S_AD_ORIT", "D_AD_ORIT", "O_L_POST", "K_SH_POST", "MP_TP_POST", "SVT_POST", "GT_POST", "FIB_G_POST", "ant_im", "lat_im", "inf_im", "post_im", "IM_PG_P", "ritm_ecg_p_01", "ritm_ecg_p_02", "ritm_ecg_p_04", "ritm_ecg_p_06", "ritm_ecg_p_07", "ritm_ecg_p_08", "n_r_ecg_p_01", "n_r_ecg_p_02", "n_r_ecg_p_03", "n_r_ecg_p_04", "n_r_ecg_p_05", "n_r_ecg_p_06", "n_r_ecg_p_08", "n_r_ecg_p_09", "n_r_ecg_p_10", "n_p_ecg_p_01", "n_p_ecg_p_03", "n_p_ecg_p_04", "n_p_ecg_p_05", "n_p_ecg_p_06", "n_p_ecg_p_07", "n_p_ecg_p_08", "n_p_ecg_p_09", "n_p_ecg_p_10", "n_p_ecg_p_11", "n_p_ecg_p_12", "fibr_ter_01", "fibr_ter_02", "fibr_ter_03", "fibr_ter_05", "fibr_ter_06", "fibr_ter_07", "fibr_ter_08", "GIPO_K", "K_BLOOD", "GIPER_NA", "NA_BLOOD", "ALT_BLOOD", "AST_BLOOD", "KFK_BLOOD", "L_BLOOD", "ROE", "TIME_B_S", "R_AB_1_n", "R_AB_2_n", "R_AB_3_n", "NA_KB", "NOT_NA_KB", "LID_KB", "NITR_S", "NA_R_1_n", "NA_R_2_n", "NA_R_3_n", "NOT_NA_1_n", "NOT_NA_2_n", "NOT_NA_3_n", "LID_S_n", "B_BLOK_S_n", "ANT_CA_S_n", "GEPAR_S_n", "ASP_S_n", "TIKL_S_n", "TRENT_S_n", "FIBR_PREDS", "PREDS_TAH", "JELUD_TAH", "FIBR_JELUD", "A_V_BLOK", "OTEK_LANC", "RAZRIV", "DRESSLER", "ZSN", "REC_IM", "P_IM_STEN", "LET_IS")
df
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- data.frame(lapply(df, function(x) {
# 将整数列转换为字符型,以处理 "?"
x <- as.character(x)
# 将 "?" 替换为 NA
x[x == "?"] <- NA
# 再将列转换回整数型
as.integer(x)
}))
selected_vars <- df %>%
select(AGE, SEX, L_BLOOD, DLIT_AG, endocr_01, endocr_02, ROE, fibr_ter_01, fibr_ter_05, fibr_ter_08, B_BLOK_S_n, ant_im,lat_im,inf_im,post_im)
# 查看选定变量的摘要统计信息
summary(selected_vars)
## AGE SEX L_BLOOD DLIT_AG
## Min. :26.00 Min. :0.0000 Min. : 2.000 Min. :0.00
## 1st Qu.:54.00 1st Qu.:0.0000 1st Qu.: 6.000 1st Qu.:0.00
## Median :63.00 Median :1.0000 Median : 8.000 Median :3.00
## Mean :61.86 Mean :0.6265 Mean : 8.337 Mean :3.34
## 3rd Qu.:70.00 3rd Qu.:1.0000 3rd Qu.:10.000 3rd Qu.:7.00
## Max. :92.00 Max. :1.0000 Max. :27.000 Max. :7.00
## NA's :8 NA's :125 NA's :248
## endocr_01 endocr_02 ROE fibr_ter_01
## Min. :0.000 Min. :0.00000 Min. : 1.00 Min. :0.000000
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.: 5.00 1st Qu.:0.000000
## Median :0.000 Median :0.00000 Median : 10.00 Median :0.000000
## Mean :0.135 Mean :0.02485 Mean : 13.44 Mean :0.007692
## 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.: 18.00 3rd Qu.:0.000000
## Max. :1.000 Max. :1.00000 Max. :140.00 Max. :1.000000
## NA's :11 NA's :10 NA's :203 NA's :10
## fibr_ter_05 fibr_ter_08 B_BLOK_S_n ant_im
## Min. :0.000000 Min. :0.000000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.000000 Median :0.000000 Median :0.0000 Median :1.000
## Mean :0.002367 Mean :0.001183 Mean :0.1273 Mean :1.571
## 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.0000 3rd Qu.:4.000
## Max. :1.000000 Max. :1.000000 Max. :1.0000 Max. :4.000
## NA's :10 NA's :10 NA's :11 NA's :83
## lat_im inf_im post_im
## Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :1.0000 Median :0.000 Median :0.0000
## Mean :0.8617 Mean :1.015 Mean :0.2592
## 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :4.0000 Max. :4.000 Max. :4.0000
## NA's :80 NA's :80 NA's :72
#LET_IS变量中不同数值的frequency
df$Status <- ifelse(df$LET_IS == 0, "Alive", "Deceased")
df$Status <- as.factor(df$Status)
df$LET_IS <- as.factor(df$LET_IS)
ggplot(df, aes(x = LET_IS)) +
geom_bar() +
labs(title = "Frequency Distribution of LET_IS Values",
x = "LET_IS Values",
y = "Frequency") +
theme_minimal()
ggplot(df, aes(x = Status)) +
geom_bar() +
labs(title = "Frequency Distribution of Status",
x = "Status",
y = "Frequency") +
theme_minimal()
#不同年龄与性别的患者心梗及其并发症的频率分布
library(ggplot2)
df$SEX <- as.factor(df$SEX)
df$LET_IS <- as.factor(df$LET_IS)
ggplot(df, aes(x = AGE, fill = SEX)) +
geom_bar(position = "dodge") +
facet_wrap(~ LET_IS, scales = "free") +
labs(title = "Distribution of Patients by Age and Gender for Different Lethal outcome",
x = "Age Group",
y = "Count",
fill = "Gender") +
theme_minimal()+
theme(axis.text.x = element_text(size = 5))
## Warning: Removed 8 rows containing non-finite values (`stat_count()`).
#不同gender的存活/死亡的比率
df$SEX <- as.factor(df$SEX)
ggplot(df, aes(x = SEX, fill = Status)) +
geom_bar() +
labs(title = "Stacked Frequency Distribution of Survival Status by Gender",
x = "Gender",
y = "Count",
fill = "Status") +
theme_minimal()
#不同的病史中心肌梗塞的数量导致的存活/死亡的比率
df_no_na <- df[!is.na(df$INF_ANAM), ]
ggplot(df_no_na, aes(x = factor(INF_ANAM), y = ..prop.., group = Status, fill = Status)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Stacked Proportion Distribution of Survival Status by Quantity of myocardial infarctions in the anamnesis. 0: zero 1: one 2: two 3: three and more)",
x = "Quantity of myocardial infarctions",
y = "Proportion") +
theme_minimal()
## Warning: The dot-dot notation (`..prop..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(prop)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#是否高血压导致的存活/死亡的比率
df_no_na <- df[!is.na(df$SIM_GIPERT), ]
ggplot(df_no_na, aes(x = factor(SIM_GIPERT), y = ..prop.., group = Status, fill = Status)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Stacked Proportion Distribution of Survival Status by Symptomatic hypertension",
x = "Survival Status by Symptomatic hypertension",
y = "Proportion") +
theme_minimal()
#不同的既往史中有劳力性心绞痛导致的存活/死亡的比率
df_no_na <- df[!is.na(df$STENOK_AN), ]
ggplot(df_no_na, aes(x = factor(STENOK_AN), y = ..prop.., group = Status, fill = Status)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Stacked Proportion Distribution of Survival Status by Exertional angina pectoris in the anamnesis(0: never 1: during the last year 2: one year ago 3: two years ago 4: three years ago 5: 4-5 years ago)",
x = "Exertional angina pectoris in the anamnesis",
y = "Proportion") +
theme_minimal()
#不同的年龄导致的存活/死亡的比率
df_no_na <- df[!is.na(df$AGE), ]
ggplot(df_no_na, aes(x = AGE, y = ..prop.., group = Status, fill = Status)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Stacked Proportion Distribution of AGE",
x = "AGE",
y = "Proportion") +
theme_minimal()+
theme(axis.text.x = element_text(size = 5))
#不同的红细胞沉降率导致的存活/死亡的比率
df_no_na <- df[!is.na(df$ROE), ]
ggplot(df_no_na, aes(x = ROE, y = ..prop.., group = Status, fill = Status)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Stacked Proportion Distribution of Erythrocyte sedimentation rate",
x = "ROE",
y = "Proportion") +
theme_minimal()
ggplot(df_no_na, aes(x = factor(Status), y = ROE, fill = factor(Status))) +
geom_violin(trim = FALSE) +
labs(title = "Violin Plot of Erythrocyte sedimentation rate by Status",
x = "Status",
y = "ROE") +
theme_minimal()
#不同的白细胞计数导致的存活/死亡的比率
ggplot(df, aes(x = L_BLOOD, y = ..prop.., group = Status, fill = Status)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Stacked Proportion Distribution of White cell count",
x = "White cell count",
y = "Proportion") +
theme_minimal()
## Warning: Removed 125 rows containing non-finite values (`stat_count()`).
ggplot(df_no_na, aes(x = factor(Status), y = L_BLOOD, fill = factor(Status))) +
geom_violin(trim = FALSE) +
labs(title = "Violin Plot of White cell count by Status",
x = "Status",
y = "L_BLOOD") +
theme_minimal()
## Warning: Removed 5 rows containing non-finite values (`stat_ydensity()`).
#糖尿病导致的存活/死亡的比率
df_no_na <- df[!is.na(df$endocr_01), ]
ggplot(df_no_na, aes(x = factor(endocr_01), y = ..prop.., group = Status, fill = Status)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Stacked Proportion Distribution of Survival Status by Diabetes",
x = "Diabetes mellitus in the anamnesis",
y = "Proportion") +
theme_minimal()
#肥胖导致的存活/死亡的比率
df_no_na <- df[!is.na(df$endocr_02 ), ]
ggplot(df_no_na, aes(x = factor(endocr_02), y = ..prop.., group = Status, fill = Status)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Stacked Proportion Distribution of Survival Status by Obesity",
x = "Obesity",
y = "Proportion") +
theme_minimal()
#每个疗法下的存活/死亡比率
library(tidyr)
library(ggplot2)
library(dplyr)
variables <- c("fibr_ter_01", "fibr_ter_02", "fibr_ter_03",
"fibr_ter_05", "fibr_ter_06", "fibr_ter_07", "fibr_ter_08")
df_clean <- df[complete.cases(df[, variables]), ]
# 将数据转换为长格式,以便每种治疗都有对应的条目
df_long <- df_clean %>%
pivot_longer(
cols = fibr_ter_01:fibr_ter_08, # 选择所有治疗变量
names_to = "Therapy",
values_to = "Treatment_Received"
) %>%
mutate(
Treatment_Received = factor(Treatment_Received, levels = c(0, 1), labels = c("No", "Yes"))
)
# 使用ggplot2绘制分面条形图
ggplot(df_long, aes(x = Treatment_Received, fill = Status)) +
geom_bar(position = "fill") +
facet_wrap(~ Therapy, scales = "free_x") + # 每种治疗一个小面
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Survival Rate Proportion by Treatment",
x = "Treatment Received",
y = "Proportion",
fill = "Status") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), # 旋转x轴标签以便于阅读
strip.text.x = element_text(angle = 0)) # 确保小面的标题水平显示
4. Logistic regression
#null model
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
df$Status <- factor(df$Status, levels = c("Alive", "Deceased"))
fit_null <- glm(Status ~ 1, data = df, family = binomial(link = "logit"))
summary(fit_null)
##
## Call:
## glm(formula = Status ~ 1, family = binomial(link = "logit"),
## data = df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.66261 0.06626 -25.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1491.6 on 1699 degrees of freedom
## Residual deviance: 1491.6 on 1699 degrees of freedom
## AIC: 1493.6
##
## Number of Fisher Scoring iterations: 3
#Confusion MAtrix
predicted_probs <- predict(fit_null, newdata = df, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(df$Status))
cm <- confusionMatrix(predicted_classes, df$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 1429 271
## Deceased 0 0
##
## Accuracy : 0.8406
## 95% CI : (0.8223, 0.8577)
## No Information Rate : 0.8406
## P-Value [Acc > NIR] : 0.5162
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.8406
## Neg Pred Value : NaN
## Prevalence : 0.8406
## Detection Rate : 0.8406
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : Alive
##
# ROC
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
##
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
roc_obj <- roc(df$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the null model")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC:", auc_value, "\n")
## AUC: 0.5
#logistic model
library(tidyverse)
library(caret)
df_all <- as.data.frame(lapply(df, function(x) {
if(is.numeric(x)) {
x[is.na(x)] <- median(x, na.rm = TRUE)
}
return(x)
})
)
set.seed(625)
indices <- createDataPartition(df_all$Status, p = 0.8, list = FALSE)
train_set <- df_all[indices, ]
test_set <- df_all[-indices, ]
fit_all = glm(Status ~ AGE + factor(SEX) +L_BLOOD + factor(DLIT_AG) + factor(endocr_01) + factor(endocr_02) + ROE +
factor(fibr_ter_01) + factor(fibr_ter_05) + factor(fibr_ter_08) + factor(B_BLOK_S_n) + ant_im + lat_im + inf_im + post_im,
data = train_set,
family = binomial(link = "logit"))
#Confusion Matrix
predicted_probs <- predict(fit_all, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(df$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 277 51
## Deceased 8 3
##
## Accuracy : 0.826
## 95% CI : (0.7813, 0.8648)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.7945
##
## Kappa : 0.0406
##
## Mcnemar's Test P-Value : 4.553e-08
##
## Sensitivity : 0.97193
## Specificity : 0.05556
## Pos Pred Value : 0.84451
## Neg Pred Value : 0.27273
## Prevalence : 0.84071
## Detection Rate : 0.81711
## Detection Prevalence : 0.96755
## Balanced Accuracy : 0.51374
##
## 'Positive' Class : Alive
##
# ROC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the null model")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC:", auc_value, "\n")
## AUC: 0.7033788
# Test for the collinearity
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:purrr':
##
## some
## The following object is masked from 'package:dplyr':
##
## recode
vif(fit_all)
## GVIF Df GVIF^(1/(2*Df))
## AGE 1.231701 1 1.109820
## factor(SEX) 1.303317 1 1.141629
## L_BLOOD 1.033607 1 1.016664
## factor(DLIT_AG) 1.225596 7 1.014637
## factor(endocr_01) 1.123386 1 1.059899
## factor(endocr_02) 1.046852 1 1.023158
## ROE 1.076991 1 1.037782
## factor(fibr_ter_01) 1.033340 1 1.016533
## factor(fibr_ter_05) 1.000000 1 1.000000
## factor(fibr_ter_08) 1.000000 1 1.000000
## factor(B_BLOK_S_n) 1.063993 1 1.031500
## ant_im 1.914410 1 1.383622
## lat_im 1.504295 1 1.226497
## inf_im 1.602693 1 1.265975
## post_im 1.251508 1 1.118708
reduced_model <- step(fit_all, direction="backward")
## Start: AIC=1072.6
## Status ~ AGE + factor(SEX) + L_BLOOD + factor(DLIT_AG) + factor(endocr_01) +
## factor(endocr_02) + ROE + factor(fibr_ter_01) + factor(fibr_ter_05) +
## factor(fibr_ter_08) + factor(B_BLOK_S_n) + ant_im + lat_im +
## inf_im + post_im
##
## Df Deviance AIC
## - factor(DLIT_AG) 7 1038.4 1068.4
## - factor(fibr_ter_01) 1 1028.9 1070.9
## - factor(fibr_ter_08) 1 1028.9 1070.9
## - post_im 1 1029.0 1071.0
## - factor(SEX) 1 1029.2 1071.2
## - factor(endocr_01) 1 1029.5 1071.5
## - factor(fibr_ter_05) 1 1030.5 1072.5
## <none> 1028.6 1072.6
## - ROE 1 1032.1 1074.1
## - lat_im 1 1033.2 1075.2
## - factor(B_BLOK_S_n) 1 1033.2 1075.2
## - factor(endocr_02) 1 1035.5 1077.5
## - inf_im 1 1039.3 1081.3
## - L_BLOOD 1 1050.9 1092.9
## - ant_im 1 1051.8 1093.8
## - AGE 1 1062.4 1104.4
##
## Step: AIC=1068.43
## Status ~ AGE + factor(SEX) + L_BLOOD + factor(endocr_01) + factor(endocr_02) +
## ROE + factor(fibr_ter_01) + factor(fibr_ter_05) + factor(fibr_ter_08) +
## factor(B_BLOK_S_n) + ant_im + lat_im + inf_im + post_im
##
## Df Deviance AIC
## - factor(fibr_ter_01) 1 1038.7 1066.7
## - post_im 1 1038.8 1066.8
## - factor(fibr_ter_08) 1 1038.8 1066.8
## - factor(SEX) 1 1039.5 1067.5
## - factor(endocr_01) 1 1039.7 1067.7
## - factor(fibr_ter_05) 1 1040.4 1068.4
## <none> 1038.4 1068.4
## - ROE 1 1041.8 1069.8
## - factor(B_BLOK_S_n) 1 1043.1 1071.1
## - lat_im 1 1043.2 1071.2
## - factor(endocr_02) 1 1045.3 1073.3
## - inf_im 1 1049.1 1077.1
## - ant_im 1 1060.8 1088.8
## - L_BLOOD 1 1061.6 1089.6
## - AGE 1 1076.7 1104.7
##
## Step: AIC=1066.73
## Status ~ AGE + factor(SEX) + L_BLOOD + factor(endocr_01) + factor(endocr_02) +
## ROE + factor(fibr_ter_05) + factor(fibr_ter_08) + factor(B_BLOK_S_n) +
## ant_im + lat_im + inf_im + post_im
##
## Df Deviance AIC
## - post_im 1 1039.0 1065.0
## - factor(fibr_ter_08) 1 1039.1 1065.1
## - factor(SEX) 1 1039.7 1065.7
## - factor(endocr_01) 1 1040.0 1066.0
## - factor(fibr_ter_05) 1 1040.7 1066.7
## <none> 1038.7 1066.7
## - ROE 1 1042.0 1068.0
## - factor(B_BLOK_S_n) 1 1043.2 1069.2
## - lat_im 1 1043.6 1069.6
## - factor(endocr_02) 1 1045.9 1071.9
## - inf_im 1 1049.4 1075.4
## - ant_im 1 1061.2 1087.2
## - L_BLOOD 1 1062.2 1088.2
## - AGE 1 1076.9 1102.9
##
## Step: AIC=1065.04
## Status ~ AGE + factor(SEX) + L_BLOOD + factor(endocr_01) + factor(endocr_02) +
## ROE + factor(fibr_ter_05) + factor(fibr_ter_08) + factor(B_BLOK_S_n) +
## ant_im + lat_im + inf_im
##
## Df Deviance AIC
## - factor(fibr_ter_08) 1 1039.4 1063.4
## - factor(SEX) 1 1040.1 1064.1
## - factor(endocr_01) 1 1040.2 1064.2
## - factor(fibr_ter_05) 1 1040.9 1064.9
## <none> 1039.0 1065.0
## - ROE 1 1042.4 1066.4
## - factor(B_BLOK_S_n) 1 1043.7 1067.7
## - lat_im 1 1044.0 1068.0
## - factor(endocr_02) 1 1046.1 1070.1
## - inf_im 1 1051.1 1075.1
## - ant_im 1 1061.3 1085.3
## - L_BLOOD 1 1062.9 1086.9
## - AGE 1 1077.0 1101.0
##
## Step: AIC=1063.39
## Status ~ AGE + factor(SEX) + L_BLOOD + factor(endocr_01) + factor(endocr_02) +
## ROE + factor(fibr_ter_05) + factor(B_BLOK_S_n) + ant_im +
## lat_im + inf_im
##
## Df Deviance AIC
## - factor(SEX) 1 1040.4 1062.4
## - factor(endocr_01) 1 1040.5 1062.5
## - factor(fibr_ter_05) 1 1041.3 1063.3
## <none> 1039.4 1063.4
## - ROE 1 1042.8 1064.8
## - factor(B_BLOK_S_n) 1 1044.0 1066.0
## - lat_im 1 1044.4 1066.4
## - factor(endocr_02) 1 1046.4 1068.4
## - inf_im 1 1051.4 1073.4
## - ant_im 1 1061.5 1083.5
## - L_BLOOD 1 1063.3 1085.3
## - AGE 1 1077.3 1099.3
##
## Step: AIC=1062.42
## Status ~ AGE + L_BLOOD + factor(endocr_01) + factor(endocr_02) +
## ROE + factor(fibr_ter_05) + factor(B_BLOK_S_n) + ant_im +
## lat_im + inf_im
##
## Df Deviance AIC
## - factor(endocr_01) 1 1042.2 1062.2
## - factor(fibr_ter_05) 1 1042.3 1062.3
## <none> 1040.4 1062.4
## - ROE 1 1044.3 1064.3
## - factor(B_BLOK_S_n) 1 1045.1 1065.1
## - lat_im 1 1045.7 1065.7
## - factor(endocr_02) 1 1048.1 1068.1
## - inf_im 1 1052.0 1072.0
## - ant_im 1 1062.2 1082.2
## - L_BLOOD 1 1064.4 1084.4
## - AGE 1 1087.7 1107.7
##
## Step: AIC=1062.21
## Status ~ AGE + L_BLOOD + factor(endocr_02) + ROE + factor(fibr_ter_05) +
## factor(B_BLOK_S_n) + ant_im + lat_im + inf_im
##
## Df Deviance AIC
## - factor(fibr_ter_05) 1 1044.1 1062.1
## <none> 1042.2 1062.2
## - ROE 1 1046.9 1064.9
## - factor(B_BLOK_S_n) 1 1047.3 1065.3
## - lat_im 1 1047.6 1065.6
## - factor(endocr_02) 1 1050.3 1068.3
## - inf_im 1 1054.4 1072.4
## - ant_im 1 1064.3 1082.3
## - L_BLOOD 1 1066.0 1084.0
## - AGE 1 1090.8 1108.8
##
## Step: AIC=1062.14
## Status ~ AGE + L_BLOOD + factor(endocr_02) + ROE + factor(B_BLOK_S_n) +
## ant_im + lat_im + inf_im
##
## Df Deviance AIC
## <none> 1044.1 1062.1
## - ROE 1 1048.9 1064.9
## - factor(B_BLOK_S_n) 1 1049.2 1065.2
## - lat_im 1 1049.2 1065.2
## - factor(endocr_02) 1 1052.2 1068.2
## - inf_im 1 1056.2 1072.2
## - ant_im 1 1066.0 1082.0
## - L_BLOOD 1 1067.1 1083.1
## - AGE 1 1092.9 1108.9
summary(reduced_model)
##
## Call:
## glm(formula = Status ~ AGE + L_BLOOD + factor(endocr_02) + ROE +
## factor(B_BLOK_S_n) + ant_im + lat_im + inf_im, family = binomial(link = "logit"),
## data = train_set)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.122304 0.611701 -11.643 < 2e-16 ***
## AGE 0.053063 0.007964 6.663 2.68e-11 ***
## L_BLOOD 0.105273 0.021620 4.869 1.12e-06 ***
## factor(endocr_02)1 1.172718 0.389158 3.013 0.00258 **
## ROE 0.015948 0.007155 2.229 0.02582 *
## factor(B_BLOK_S_n)1 -0.662011 0.314782 -2.103 0.03546 *
## ant_im 0.279873 0.059886 4.673 2.96e-06 ***
## lat_im 0.229466 0.100733 2.278 0.02273 *
## inf_im 0.230443 0.065895 3.497 0.00047 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1194.3 on 1360 degrees of freedom
## Residual deviance: 1044.1 on 1352 degrees of freedom
## AIC: 1062.1
##
## Number of Fisher Scoring iterations: 5
reduced_fit = glm(formula = Status ~ AGE + L_BLOOD + factor(DLIT_AG) + factor(endocr_01) +
factor(endocr_02) + factor(B_BLOK_S_n) +factor(fibr_ter_05) + ant_im + lat_im + inf_im, family = binomial(link = "logit"),
data = df_all)
summary(reduced_fit)
##
## Call:
## glm(formula = Status ~ AGE + L_BLOOD + factor(DLIT_AG) + factor(endocr_01) +
## factor(endocr_02) + factor(B_BLOK_S_n) + factor(fibr_ter_05) +
## ant_im + lat_im + inf_im, family = binomial(link = "logit"),
## data = df_all)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.028532 0.556695 -12.625 < 2e-16 ***
## AGE 0.052312 0.007287 7.178 7.05e-13 ***
## L_BLOOD 0.104602 0.019637 5.327 9.99e-08 ***
## factor(DLIT_AG)1 -0.676217 0.496733 -1.361 0.17341
## factor(DLIT_AG)2 -0.093403 0.478675 -0.195 0.84529
## factor(DLIT_AG)3 0.418651 0.209724 1.996 0.04591 *
## factor(DLIT_AG)4 0.771002 0.560332 1.376 0.16883
## factor(DLIT_AG)5 -0.213263 0.419142 -0.509 0.61089
## factor(DLIT_AG)6 0.725070 0.242464 2.990 0.00279 **
## factor(DLIT_AG)7 0.251408 0.199078 1.263 0.20664
## factor(endocr_01)1 0.321610 0.188145 1.709 0.08738 .
## factor(endocr_02)1 1.071470 0.357238 2.999 0.00271 **
## factor(B_BLOK_S_n)1 -0.650860 0.285223 -2.282 0.02249 *
## factor(fibr_ter_05)1 -13.451325 410.150615 -0.033 0.97384
## ant_im 0.238284 0.054255 4.392 1.12e-05 ***
## lat_im 0.243534 0.093237 2.612 0.00900 **
## inf_im 0.218676 0.059538 3.673 0.00024 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1491.6 on 1699 degrees of freedom
## Residual deviance: 1296.6 on 1683 degrees of freedom
## AIC: 1330.6
##
## Number of Fisher Scoring iterations: 13
# this block deals with the data, to generate 2 columns: AgeGroup, and DLIT_AG
selected_df = df_all[, c("Status", "AGE", "L_BLOOD", "DLIT_AG", "endocr_01", "endocr_02", "B_BLOK_S_n","fibr_ter_05",
"ant_im", "lat_im", "inf_im")]
selected_df$AgeGroup <- cut(selected_df$AGE,
breaks = c(-Inf, 50, 60, 70, Inf),
labels = c("<50", "50-60", "60-70", ">70"),
right = FALSE)
selected_df$DLIT_AG = factor(selected_df$DLIT_AG)
# conver the data type
selected_df$Status = as.factor(selected_df$Status)
selected_df$endocr_01 = as.factor(selected_df$endocr_01)
selected_df$endocr_02 = as.factor(selected_df$endocr_02)
selected_df$B_BLOK_S_n = as.factor(selected_df$B_BLOK_S_n)
selected_df$fibr_ter_05 = as.factor(selected_df$fibr_ter_05)
# output the dataset
write.csv(selected_df, file = "selected_df.csv", row.names = FALSE)
set.seed(119)
# start to do the multilevel logistic regression
selected_df <- read_csv("selected_df.csv")
## Rows: 1700 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Status, AgeGroup
## dbl (10): AGE, L_BLOOD, DLIT_AG, endocr_01, endocr_02, B_BLOK_S_n, fibr_ter_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
selected_df$Status = as.factor(selected_df$Status)
selected_df$endocr_01 = as.factor(selected_df$endocr_01)
selected_df$endocr_02 = as.factor(selected_df$endocr_02)
selected_df$B_BLOK_S_n = as.factor(selected_df$B_BLOK_S_n)
selected_df$fibr_ter_05 = as.factor(selected_df$fibr_ter_05)
indices <- createDataPartition(selected_df$Status, p = 0.8, list = FALSE)
train_set <- selected_df[indices, ]
test_set <- selected_df[-indices, ]
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
# Complete Pooling(ignore the age group)
m0 <- glm(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im,
data = train_set,
family = binomial(link="logit")
)
print(summary(m0))
##
## Call:
## glm(formula = Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 +
## B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im, family = binomial(link = "logit"),
## data = train_set)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.53922 0.27601 -12.823 < 2e-16 ***
## L_BLOOD 0.09978 0.02143 4.657 3.21e-06 ***
## DLIT_AG 0.07933 0.02751 2.883 0.00393 **
## endocr_011 0.38912 0.20438 1.904 0.05693 .
## endocr_021 0.72892 0.40777 1.788 0.07384 .
## B_BLOK_S_n1 -0.87389 0.30937 -2.825 0.00473 **
## fibr_ter_051 -13.74471 427.00959 -0.032 0.97432
## ant_im 0.28027 0.05898 4.752 2.01e-06 ***
## lat_im 0.08733 0.10014 0.872 0.38317
## inf_im 0.15359 0.06703 2.291 0.02195 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1194.3 on 1360 degrees of freedom
## Residual deviance: 1100.5 on 1351 degrees of freedom
## AIC: 1120.5
##
## Number of Fisher Scoring iterations: 13
# Confusion Matrix
predicted_probs <- predict(m0, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 283 51
## Deceased 2 3
##
## Accuracy : 0.8437
## 95% CI : (0.8005, 0.8806)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.4771
##
## Kappa : 0.0768
##
## Mcnemar's Test P-Value : 4.301e-11
##
## Sensitivity : 0.99298
## Specificity : 0.05556
## Pos Pred Value : 0.84731
## Neg Pred Value : 0.60000
## Prevalence : 0.84071
## Detection Rate : 0.83481
## Detection Prevalence : 0.98525
## Balanced Accuracy : 0.52427
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7169266
library(lme4)
# No Pooling
m <- glm(Status ~ factor(AgeGroup) + L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im,
data = train_set,
family = binomial(link="logit")
)
print(summary(m))
##
## Call:
## glm(formula = Status ~ factor(AgeGroup) + L_BLOOD + DLIT_AG +
## endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im +
## lat_im + inf_im, family = binomial(link = "logit"), data = train_set)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.85796 0.47359 -10.258 < 2e-16 ***
## factor(AgeGroup)>70 1.88568 0.41829 4.508 6.54e-06 ***
## factor(AgeGroup)50-60 0.92494 0.43116 2.145 0.031932 *
## factor(AgeGroup)60-70 1.51792 0.41227 3.682 0.000232 ***
## L_BLOOD 0.10476 0.02185 4.794 1.63e-06 ***
## DLIT_AG 0.03928 0.02855 1.376 0.168920
## endocr_011 0.29446 0.20643 1.426 0.153729
## endocr_021 0.83277 0.41524 2.005 0.044911 *
## B_BLOK_S_n1 -0.64678 0.31360 -2.062 0.039164 *
## fibr_ter_051 -13.65208 426.39842 -0.032 0.974458
## ant_im 0.26827 0.05928 4.525 6.03e-06 ***
## lat_im 0.13374 0.10268 1.303 0.192744
## inf_im 0.15163 0.06726 2.254 0.024165 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1194.3 on 1360 degrees of freedom
## Residual deviance: 1062.8 on 1348 degrees of freedom
## AIC: 1088.8
##
## Number of Fisher Scoring iterations: 13
# Confusion Matrix
predicted_probs <- predict(m, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 283 51
## Deceased 2 3
##
## Accuracy : 0.8437
## 95% CI : (0.8005, 0.8806)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.4771
##
## Kappa : 0.0768
##
## Mcnemar's Test P-Value : 4.301e-11
##
## Sensitivity : 0.99298
## Specificity : 0.05556
## Pos Pred Value : 0.84731
## Neg Pred Value : 0.60000
## Prevalence : 0.84071
## Detection Rate : 0.83481
## Detection Prevalence : 0.98525
## Balanced Accuracy : 0.52427
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7798246
library(lme4)
# Partial Pooling
# multilevel model 1 of random intercept
m1 <- glmer(Status ~ 1 + (1 | AgeGroup),
data = train_set,
family = binomial(link="logit"),
nAGQ = 1)
print(summary(m1))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Status ~ 1 + (1 | AgeGroup)
## Data: train_set
##
## AIC BIC logLik deviance df.resid
## 1163.8 1174.3 -579.9 1159.8 1359
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.5552 -0.4671 -0.3450 -0.2340 4.2735
##
## Random effects:
## Groups Name Variance Std.Dev.
## AgeGroup (Intercept) 0.4839 0.6956
## Number of obs: 1361, groups: AgeGroup, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.9463 0.3644 -5.341 9.26e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Confusion Matrix
predicted_probs <- predict(m1, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 285 54
## Deceased 0 0
##
## Accuracy : 0.8407
## 95% CI : (0.7973, 0.878)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.5362
##
## Kappa : 0
##
## Mcnemar's Test P-Value : 5.498e-13
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.8407
## Neg Pred Value : NaN
## Prevalence : 0.8407
## Detection Rate : 0.8407
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.6943145
# multilevel model 2 of random intercept and random slope of structured variable "endocr_01"
m2 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im
+ (1 + endocr_01 | AgeGroup),
data = train_set,
family = binomial(link="logit"),
nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m2))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +
## fibr_ter_05 + ant_im + lat_im + inf_im + (1 + endocr_01 | AgeGroup)
## Data: train_set
##
## AIC BIC logLik deviance df.resid
## 1102.6 1170.4 -538.3 1076.6 1348
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4847 -0.4559 -0.3322 -0.2134 5.5174
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## AgeGroup (Intercept) 0.4236 0.6509
## endocr_011 0.1032 0.3212 -1.00
## Number of obs: 1361, groups: AgeGroup, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.72736 0.43579 -8.553 < 2e-16 ***
## L_BLOOD 0.10344 0.02181 4.743 2.11e-06 ***
## DLIT_AG 0.04290 0.02855 1.503 0.1329
## endocr_011 0.50431 0.31753 1.588 0.1122
## endocr_021 0.82442 0.41467 1.988 0.0468 *
## B_BLOK_S_n1 -0.66438 0.31276 -2.124 0.0337 *
## fibr_ter_051 -15.33771 120.67965 -0.127 0.8989
## ant_im 0.27048 0.05917 4.571 4.85e-06 ***
## lat_im 0.12806 0.10227 1.252 0.2105
## inf_im 0.15229 0.06708 2.270 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD -0.418
## DLIT_AG -0.213 -0.026
## endocr_011 -0.423 -0.002 -0.115
## endocr_021 -0.020 0.035 -0.096 -0.026
## B_BLOK_S_n1 -0.076 0.037 -0.053 0.062 0.028
## fibr_tr_051 -0.001 0.000 0.000 0.000 -0.001 0.000
## ant_im -0.198 -0.073 0.025 0.008 -0.010 0.004 0.000
## lat_im -0.146 -0.041 0.053 -0.006 0.005 -0.056 0.000 -0.426
## inf_im -0.273 -0.085 0.079 -0.026 -0.049 0.062 0.000 0.443 0.136
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m2, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 283 51
## Deceased 2 3
##
## Accuracy : 0.8437
## 95% CI : (0.8005, 0.8806)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.4771
##
## Kappa : 0.0768
##
## Mcnemar's Test P-Value : 4.301e-11
##
## Sensitivity : 0.99298
## Specificity : 0.05556
## Pos Pred Value : 0.84731
## Neg Pred Value : 0.60000
## Prevalence : 0.84071
## Detection Rate : 0.83481
## Detection Prevalence : 0.98525
## Balanced Accuracy : 0.52427
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7767706
# multilevel model 3 of random intercept and random slope of structured variable "endocr_02"
m3 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im +
(1 + endocr_02| AgeGroup) ,
data = train_set,
family = binomial(link="logit"),
nAGQ = 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
print(summary(m3))
## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +
## fibr_ter_05 + ant_im + lat_im + inf_im + (1 + endocr_02 | AgeGroup)
## Data: train_set
##
## AIC BIC logLik deviance df.resid
## 1103.5 1171.3 -538.8 1077.5 1348
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4570 -0.4534 -0.3341 -0.2148 5.4349
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## AgeGroup (Intercept) 0.399143 0.63178
## endocr_021 0.005527 0.07434 -1.00
## Number of obs: 1361, groups: AgeGroup, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.73164 0.42676 -8.744 < 2e-16 ***
## L_BLOOD 0.10451 0.02178 4.798 1.60e-06 ***
## DLIT_AG 0.04319 0.02846 1.517 0.1292
## endocr_011 0.30262 0.20614 1.468 0.1421
## endocr_021 0.86283 0.41471 2.081 0.0375 *
## B_BLOK_S_n1 -0.67208 0.31317 -2.146 0.0319 *
## fibr_ter_051 -13.83831 775.40205 -0.018 0.9858
## ant_im 0.26973 0.05921 4.555 5.23e-06 ***
## lat_im 0.12881 0.10219 1.260 0.2075
## inf_im 0.15173 0.06718 2.259 0.0239 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD -0.425
## DLIT_AG -0.230 -0.023
## endocr_011 -0.054 0.044 -0.167
## endocr_021 -0.088 0.032 -0.098 -0.017
## B_BLOK_S_n1 -0.072 0.037 -0.048 0.075 0.028
## fibr_tr_051 0.000 0.000 0.000 0.000 0.000 0.000
## ant_im -0.204 -0.075 0.025 -0.003 -0.010 0.003 0.000
## lat_im -0.146 -0.041 0.057 -0.001 0.006 -0.054 0.000 -0.425
## inf_im -0.279 -0.086 0.080 -0.054 -0.048 0.062 0.000 0.445 0.136
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
# Confusion Matrix
predicted_probs <- predict(m3, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 283 51
## Deceased 2 3
##
## Accuracy : 0.8437
## 95% CI : (0.8005, 0.8806)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.4771
##
## Kappa : 0.0768
##
## Mcnemar's Test P-Value : 4.301e-11
##
## Sensitivity : 0.99298
## Specificity : 0.05556
## Pos Pred Value : 0.84731
## Neg Pred Value : 0.60000
## Prevalence : 0.84071
## Detection Rate : 0.83481
## Detection Prevalence : 0.98525
## Balanced Accuracy : 0.52427
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7751462
# multilevel model 4 of random intercept and random slope of structured variable "L_BLOOD"
m4 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n
+ fibr_ter_05 + ant_im + lat_im + inf_im + (1 + L_BLOOD | AgeGroup),
data = train_set,
family = binomial(link="logit"),
nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m4))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +
## fibr_ter_05 + ant_im + lat_im + inf_im + (1 + L_BLOOD | AgeGroup)
## Data: train_set
##
## AIC BIC logLik deviance df.resid
## 1103.1 1170.9 -538.5 1077.1 1348
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6474 -0.4527 -0.3318 -0.2151 5.2838
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## AgeGroup (Intercept) 0.1855187 0.4307
## L_BLOOD 0.0005381 0.0232 1.00
## Number of obs: 1361, groups: AgeGroup, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.63721 0.38480 -9.452 < 2e-16 ***
## L_BLOOD 0.09333 0.03028 3.082 0.00205 **
## DLIT_AG 0.04347 0.02859 1.520 0.12844
## endocr_011 0.31284 0.20684 1.513 0.13040
## endocr_021 0.83670 0.41345 2.024 0.04300 *
## B_BLOK_S_n1 -0.66754 0.31258 -2.136 0.03271 *
## fibr_ter_051 -16.25381 128.00009 -0.127 0.89895
## ant_im 0.26966 0.05925 4.551 5.34e-06 ***
## lat_im 0.13073 0.10246 1.276 0.20198
## inf_im 0.15334 0.06734 2.277 0.02278 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD -0.328
## DLIT_AG -0.239 -0.025
## endocr_011 -0.033 -0.006 -0.162
## endocr_021 -0.017 0.017 -0.097 -0.016
## B_BLOK_S_n1 -0.078 0.014 -0.052 0.075 0.027
## fibr_tr_051 0.001 -0.001 0.000 0.000 0.000 0.000
## ant_im -0.224 -0.056 0.025 -0.003 -0.009 0.004 0.000
## lat_im -0.154 -0.048 0.053 0.001 0.005 -0.050 0.000 -0.424
## inf_im -0.296 -0.084 0.080 -0.051 -0.048 0.065 0.000 0.445 0.138
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m4, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 283 50
## Deceased 2 4
##
## Accuracy : 0.8466
## 95% CI : (0.8038, 0.8833)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.4182
##
## Kappa : 0.1048
##
## Mcnemar's Test P-Value : 7.138e-11
##
## Sensitivity : 0.99298
## Specificity : 0.07407
## Pos Pred Value : 0.84985
## Neg Pred Value : 0.66667
## Prevalence : 0.84071
## Detection Rate : 0.83481
## Detection Prevalence : 0.98230
## Balanced Accuracy : 0.53353
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7775504
# multilevel model 5 of random intercept and random slope of structured variable "DLIT_AG"
m5 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im +
(1 + DLIT_AG | AgeGroup),
data = train_set,
family = binomial(link="logit"),
nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m5))
## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +
## fibr_ter_05 + ant_im + lat_im + inf_im + (1 + DLIT_AG | AgeGroup)
## Data: train_set
##
## AIC BIC logLik deviance df.resid
## 1102.6 1170.4 -538.3 1076.6 1348
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4762 -0.4532 -0.3323 -0.2139 5.8333
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## AgeGroup (Intercept) 0.28198 0.53102
## DLIT_AG 0.00177 0.04207 1.00
## Number of obs: 1361, groups: AgeGroup, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.70746 0.38963 -9.515 < 2e-16 ***
## L_BLOOD 0.10532 0.02180 4.831 1.36e-06 ***
## DLIT_AG 0.02004 0.03655 0.548 0.5834
## endocr_011 0.29987 0.20644 1.453 0.1463
## endocr_021 0.84672 0.41525 2.039 0.0414 *
## B_BLOK_S_n1 -0.65772 0.31332 -2.099 0.0358 *
## fibr_ter_051 -14.31855 962.91266 -0.015 0.9881
## ant_im 0.27003 0.05929 4.554 5.25e-06 ***
## lat_im 0.12908 0.10218 1.263 0.2065
## inf_im 0.15475 0.06730 2.299 0.0215 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD -0.466
## DLIT_AG 0.214 -0.022
## endocr_011 -0.060 0.044 -0.124
## endocr_021 -0.021 0.032 -0.077 -0.016
## B_BLOK_S_n1 -0.080 0.038 -0.047 0.072 0.029
## fibr_tr_051 0.000 0.000 0.000 0.000 0.000 0.000
## ant_im -0.224 -0.075 0.022 -0.003 -0.010 0.003 0.000
## lat_im -0.160 -0.041 0.039 -0.001 0.005 -0.051 0.000 -0.423
## inf_im -0.307 -0.085 0.060 -0.055 -0.048 0.066 0.000 0.447 0.138
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m5, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 283 51
## Deceased 2 3
##
## Accuracy : 0.8437
## 95% CI : (0.8005, 0.8806)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.4771
##
## Kappa : 0.0768
##
## Mcnemar's Test P-Value : 4.301e-11
##
## Sensitivity : 0.99298
## Specificity : 0.05556
## Pos Pred Value : 0.84731
## Neg Pred Value : 0.60000
## Prevalence : 0.84071
## Detection Rate : 0.83481
## Detection Prevalence : 0.98525
## Balanced Accuracy : 0.52427
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7737167
# multilevel model 6 of random intercept and random slope of structured variable "B_BLOK_S_n"
m6 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im +
(1 + B_BLOK_S_n | AgeGroup),
data = train_set,
family = binomial(link="logit"),
nAGQ = 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
print(summary(m6))
## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +
## fibr_ter_05 + ant_im + lat_im + inf_im + (1 + B_BLOK_S_n | AgeGroup)
## Data: train_set
##
## AIC BIC logLik deviance df.resid
## 1101.0 1168.8 -537.5 1075.0 1348
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4820 -0.4527 -0.3381 -0.2056 6.9733
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## AgeGroup (Intercept) 0.3456 0.5879
## B_BLOK_S_n1 0.5942 0.7708 1.00
## Number of obs: 1361, groups: AgeGroup, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.72806 0.41064 -9.079 < 2e-16 ***
## L_BLOOD 0.10470 0.02175 4.815 1.47e-06 ***
## DLIT_AG 0.04365 0.02845 1.534 0.1249
## endocr_011 0.30719 0.20600 1.491 0.1359
## endocr_021 0.82917 0.41419 2.002 0.0453 *
## B_BLOK_S_n1 -0.99166 0.51093 -1.941 0.0523 .
## fibr_ter_051 -13.29849 580.59288 -0.023 0.9817
## ant_im 0.27041 0.05927 4.563 5.05e-06 ***
## lat_im 0.13776 0.10296 1.338 0.1809
## inf_im 0.15629 0.06734 2.321 0.0203 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD -0.443
## DLIT_AG -0.243 -0.022
## endocr_011 -0.057 0.045 -0.171
## endocr_021 -0.020 0.031 -0.096 -0.016
## B_BLOK_S_n1 0.518 0.024 -0.008 0.058 0.020
## fibr_tr_051 0.000 0.000 0.000 0.000 0.000 0.000
## ant_im -0.213 -0.072 0.024 -0.003 -0.008 0.008 0.000
## lat_im -0.157 -0.036 0.059 0.002 0.003 -0.027 0.000 -0.423
## inf_im -0.295 -0.081 0.080 -0.054 -0.048 0.047 0.000 0.446 0.142
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
# Confusion Matrix
predicted_probs <- predict(m6, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 283 51
## Deceased 2 3
##
## Accuracy : 0.8437
## 95% CI : (0.8005, 0.8806)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.4771
##
## Kappa : 0.0768
##
## Mcnemar's Test P-Value : 4.301e-11
##
## Sensitivity : 0.99298
## Specificity : 0.05556
## Pos Pred Value : 0.84731
## Neg Pred Value : 0.60000
## Prevalence : 0.84071
## Detection Rate : 0.83481
## Detection Prevalence : 0.98525
## Balanced Accuracy : 0.52427
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7704678
# multilevel model 7 of random intercept and random slope of structured variable "fibr_ter_05"
m7 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 +
B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im + (1 + fibr_ter_05 | AgeGroup),
data = train_set,
family = binomial(link="logit"),
nAGQ = 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
print(summary(m7))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +
## fibr_ter_05 + ant_im + lat_im + inf_im + (1 + fibr_ter_05 | AgeGroup)
## Data: train_set
##
## AIC BIC logLik deviance df.resid
## 1103.6 1171.4 -538.8 1077.6 1348
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4568 -0.4529 -0.3344 -0.2150 5.4321
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## AgeGroup (Intercept) 0.3978 0.6307
## fibr_ter_051 1.1485 1.0717 -0.08
## Number of obs: 1361, groups: AgeGroup, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.73145 0.42809 -8.716 < 2e-16 ***
## L_BLOOD 0.10458 0.02178 4.801 1.57e-06 ***
## DLIT_AG 0.04321 0.02856 1.513 0.1303
## endocr_011 0.30294 0.20616 1.469 0.1417
## endocr_021 0.83182 0.41458 2.006 0.0448 *
## B_BLOK_S_n1 -0.67222 0.31261 -2.150 0.0315 *
## fibr_ter_051 -20.12832 512.01631 -0.039 0.9686
## ant_im 0.26957 0.05917 4.556 5.22e-06 ***
## lat_im 0.12852 0.10221 1.257 0.2086
## inf_im 0.15160 0.06712 2.259 0.0239 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD -0.425
## DLIT_AG -0.219 -0.025
## endocr_011 -0.051 0.043 -0.164
## endocr_021 -0.019 0.032 -0.097 -0.016
## B_BLOK_S_n1 -0.077 0.038 -0.053 0.073 0.027
## fibr_tr_051 -0.007 0.003 0.002 0.000 0.001 -0.001
## ant_im -0.202 -0.075 0.026 -0.003 -0.011 0.003 0.002
## lat_im -0.149 -0.040 0.052 -0.002 0.005 -0.053 0.001 -0.425
## inf_im -0.278 -0.086 0.079 -0.054 -0.049 0.062 0.002 0.445 0.136
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
# Confusion Matrix
predicted_probs <- predict(m7, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 283 51
## Deceased 2 3
##
## Accuracy : 0.8437
## 95% CI : (0.8005, 0.8806)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.4771
##
## Kappa : 0.0768
##
## Mcnemar's Test P-Value : 4.301e-11
##
## Sensitivity : 0.99298
## Specificity : 0.05556
## Pos Pred Value : 0.84731
## Neg Pred Value : 0.60000
## Prevalence : 0.84071
## Detection Rate : 0.83481
## Detection Prevalence : 0.98525
## Balanced Accuracy : 0.52427
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7748213
# multilevel model 8 of random intercept and random slope of structured variable "ant_im"
m8 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 +
B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im + (1 + ant_im | AgeGroup),
data = train_set,
family = binomial(link="logit"),
nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m8))
## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +
## fibr_ter_05 + ant_im + lat_im + inf_im + (1 + ant_im | AgeGroup)
## Data: train_set
##
## AIC BIC logLik deviance df.resid
## 1103.5 1171.3 -538.7 1077.5 1348
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4687 -0.4504 -0.3346 -0.2167 5.3295
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## AgeGroup (Intercept) 0.3491161 0.59086
## ant_im 0.0003425 0.01851 1.00
## Number of obs: 1361, groups: AgeGroup, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.70878 0.41098 -9.024 < 2e-16 ***
## L_BLOOD 0.10453 0.02179 4.798 1.60e-06 ***
## DLIT_AG 0.04306 0.02848 1.512 0.1305
## endocr_011 0.30476 0.20626 1.477 0.1395
## endocr_021 0.83067 0.41411 2.006 0.0449 *
## B_BLOK_S_n1 -0.66766 0.31326 -2.131 0.0331 *
## fibr_ter_051 -13.42857 636.57579 -0.021 0.9832
## ant_im 0.25923 0.06002 4.319 1.57e-05 ***
## lat_im 0.13109 0.10234 1.281 0.2002
## inf_im 0.15102 0.06717 2.248 0.0246 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD -0.441
## DLIT_AG -0.240 -0.024
## endocr_011 -0.057 0.043 -0.167
## endocr_021 -0.019 0.032 -0.097 -0.016
## B_BLOK_S_n1 -0.075 0.038 -0.049 0.075 0.027
## fibr_tr_051 0.000 0.000 0.000 0.000 0.000 0.000
## ant_im -0.091 -0.075 0.030 -0.002 -0.011 0.000 0.000
## lat_im -0.152 -0.041 0.056 -0.001 0.005 -0.053 0.000 -0.421
## inf_im -0.290 -0.086 0.080 -0.053 -0.049 0.063 0.000 0.439 0.137
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m8, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 283 51
## Deceased 2 3
##
## Accuracy : 0.8437
## 95% CI : (0.8005, 0.8806)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.4771
##
## Kappa : 0.0768
##
## Mcnemar's Test P-Value : 4.301e-11
##
## Sensitivity : 0.99298
## Specificity : 0.05556
## Pos Pred Value : 0.84731
## Neg Pred Value : 0.60000
## Prevalence : 0.84071
## Detection Rate : 0.83481
## Detection Prevalence : 0.98525
## Balanced Accuracy : 0.52427
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7754061
# multilevel model 9 of random intercept and random slope of structured variable "lat_im"
m9 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 +
B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im + (1 + lat_im | AgeGroup),
data = train_set,
family = binomial(link="logit"),
nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m9))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +
## fibr_ter_05 + ant_im + lat_im + inf_im + (1 + lat_im | AgeGroup)
## Data: train_set
##
## AIC BIC logLik deviance df.resid
## 1102.3 1170.1 -538.1 1076.3 1348
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4873 -0.4472 -0.3325 -0.2184 5.3102
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## AgeGroup (Intercept) 0.23837 0.4882
## lat_im 0.01853 0.1361 1.00
## Number of obs: 1361, groups: AgeGroup, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.66326 0.38112 -9.612 < 2e-16 ***
## L_BLOOD 0.10463 0.02182 4.796 1.62e-06 ***
## DLIT_AG 0.04292 0.02861 1.500 0.1335
## endocr_011 0.31099 0.20711 1.502 0.1332
## endocr_021 0.82254 0.41454 1.984 0.0472 *
## B_BLOK_S_n1 -0.64449 0.31330 -2.057 0.0397 *
## fibr_ter_051 -13.30000 147.80185 -0.090 0.9283
## ant_im 0.26903 0.05926 4.540 5.63e-06 ***
## lat_im 0.07266 0.13514 0.538 0.5908
## inf_im 0.15141 0.06733 2.249 0.0245 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD -0.478
## DLIT_AG -0.249 -0.026
## endocr_011 -0.053 0.043 -0.163
## endocr_021 -0.026 0.031 -0.097 -0.018
## B_BLOK_S_n1 -0.080 0.041 -0.051 0.078 0.025
## fibr_tr_051 0.000 0.000 0.000 0.000 0.001 0.000
## ant_im -0.226 -0.076 0.027 -0.003 -0.011 0.001 0.000
## lat_im 0.147 -0.028 0.046 -0.012 0.017 -0.059 0.000 -0.324
## inf_im -0.317 -0.086 0.080 -0.052 -0.049 0.067 0.000 0.444 0.114
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m9, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 283 51
## Deceased 2 3
##
## Accuracy : 0.8437
## 95% CI : (0.8005, 0.8806)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.4771
##
## Kappa : 0.0768
##
## Mcnemar's Test P-Value : 4.301e-11
##
## Sensitivity : 0.99298
## Specificity : 0.05556
## Pos Pred Value : 0.84731
## Neg Pred Value : 0.60000
## Prevalence : 0.84071
## Detection Rate : 0.83481
## Detection Prevalence : 0.98525
## Balanced Accuracy : 0.52427
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7710526
# multilevel model 10 of random intercept and random slope of structured variable "inf_im"
m10 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 +
B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im + (1 + inf_im | AgeGroup),
data = train_set,
family = binomial(link="logit"),
nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m10))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +
## fibr_ter_05 + ant_im + lat_im + inf_im + (1 + inf_im | AgeGroup)
## Data: train_set
##
## AIC BIC logLik deviance df.resid
## 1103.4 1171.3 -538.7 1077.4 1348
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4646 -0.4518 -0.3346 -0.2143 5.4287
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## AgeGroup (Intercept) 0.4286674 0.65473
## inf_im 0.0006927 0.02632 -1.00
## Number of obs: 1361, groups: AgeGroup, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.74446 0.43910 -8.528 < 2e-16 ***
## L_BLOOD 0.10423 0.02181 4.778 1.77e-06 ***
## DLIT_AG 0.04278 0.02860 1.496 0.1347
## endocr_011 0.30466 0.20631 1.477 0.1398
## endocr_021 0.83100 0.41455 2.005 0.0450 *
## B_BLOK_S_n1 -0.66990 0.31282 -2.141 0.0322 *
## fibr_ter_051 -19.39913 209.02429 -0.093 0.9261
## ant_im 0.27067 0.05930 4.565 5.00e-06 ***
## lat_im 0.13051 0.10241 1.274 0.2025
## inf_im 0.16725 0.08264 2.024 0.0430 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD -0.409
## DLIT_AG -0.208 -0.023
## endocr_011 -0.052 0.042 -0.165
## endocr_021 -0.018 0.033 -0.097 -0.017
## B_BLOK_S_n1 -0.077 0.037 -0.055 0.074 0.028
## fibr_tr_051 -0.001 0.000 0.000 0.000 -0.003 0.001
## ant_im -0.201 -0.077 0.023 -0.002 -0.011 0.003 0.000
## lat_im -0.150 -0.044 0.049 0.000 0.005 -0.052 0.000 -0.420
## inf_im -0.394 -0.099 0.036 -0.029 -0.044 0.064 0.001 0.393 0.144
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m10, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Alive Deceased
## Alive 283 51
## Deceased 2 3
##
## Accuracy : 0.8437
## 95% CI : (0.8005, 0.8806)
## No Information Rate : 0.8407
## P-Value [Acc > NIR] : 0.4771
##
## Kappa : 0.0768
##
## Mcnemar's Test P-Value : 4.301e-11
##
## Sensitivity : 0.99298
## Specificity : 0.05556
## Pos Pred Value : 0.84731
## Neg Pred Value : 0.60000
## Prevalence : 0.84071
## Detection Rate : 0.83481
## Detection Prevalence : 0.98525
## Balanced Accuracy : 0.52427
##
## 'Positive' Class : Alive
##
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")
auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7733268